%script for running the embeddedness analysis
clear all
clc



%load data
%individual characteristics data
App_Users_Demographic=dataset('File','App_Users_in_Graph_demographics.csv'); %the users we consider in our analysis, i.e. we have running activity data for them
%running activity data
load distance_mat %for example distance_mat(3,1) is the distance individual with user number=1 runs on day 3
load duration_mat
load run_mat
load pace_mat
load calories_mat
load TimeZone_mat
load StartTime_mat
%weather data
load PRECIPITATION_mat
load TMAX_mat
%running-buddy network
load USERREL_USEDFOR_SOCIAL_INFLUENCE_wth_correlations


daynumbers_consideration=datenum(first_day_of_observations):datenum(last_day_of_observations);daynumbers_consideration=daynumbers_consideration';
l=find(daynumbers_consideration==min(USERREL_USEDFOR_SOCIAL_INFLUENCE.created_datenumber)); %find when the first link was created and work the model thereafter
daynumbers_consideration=daynumbers_consideration(l:end);
PRECIPITATION_mat=PRECIPITATION_mat(l:end,:);
TMAX_mat=TMAX_mat(l:end,:);
run_mat=run_mat(l:end,:);
distance_mat=distance_mat(l:end,:);
duration_mat=duration_mat(l:end,:);
pace_mat=pace_mat(l:end,:);
calories_mat=calories_mat(l:end,:);
TimeZone_mat=TimeZone_mat(l:end,:);
StartTime_mat=StartTime_mat(l:end,:);



%%%%%%%%%%%%%%% time-invariant covariates of individuals %%%%%%%%%
%age
age_vec=year(App_Users_Demographic.JOIN_DATE)-App_Users_Demographic.BIRTH_DATE_YEAR;
%gender
gender_vec=zeros(size(age_vec));
gender_vec(find(strcmp(App_Users_Demographic.GENDER,'M') | strcmp(App_Users_Demographic.GENDER,'m')))=1; %males
gender_vec(find(strcmp(App_Users_Demographic.GENDER,'F') | strcmp(App_Users_Demographic.GENDER,'f')))=2; %females
gender_vec(find(strcmp(App_Users_Demographic.GENDER,'U') | strcmp(App_Users_Demographic.GENDER,'u') | strcmp(App_Users_Demographic.GENDER,'')))=3; %unidentified
%height
height_vec=App_Users_Demographic.HEIGHT;
%weight
weight_vec=App_Users_Demographic.WEIGHT;
%device
device_vec=App_Users_Demographic.DEVICE_ID;
%country
country_vec=zeros(size(age_vec));
   usa={'US', 'USA', 'USA ', 'United States', 'United States of America','United+States', 'United+States+of+America', 'Usa', 'Us', 'uS', 'uSA','united States', 'united States of America', 'united states', 'united states of america', 'united+states', 'us', 'usa'};
   uk={'England', 'GB', 'Great Britain', 'United Kingdom', 'Uk', 'uk','United+Kingdom', 'england', 'gB', 'gb', 'uK', 'united kingdom','united+kingdom'};
   jp= {'JAPAN', 'JP', 'jp', 'jpn','japan'};
   ca={ 'CA', 'CAN', 'CANADA', 'CAnada', 'Canada', 'Canada ', 'Canada+','ca', 'canada'};
   de={'DE', 'GE', 'GER', 'Germany', 'de', 'germany','ge','ger'};
   es={'ES', 'ESP', 'Es', 'Esp', 'Espa%C3%B1a', 'Espa&ntilde;a','catalonia', 'es', 'espa%C3%B1a', 'espa&ntilde;a', 'espa?a','SPAIN','spain'};
   br={ 'BR', 'BRASIL', 'BRA', 'BRAZIL', 'Brasil', 'Brazil', 'bR', 'br', 'brasil'};
   au={ 'AU', 'AUSTRALIA', 'AUT', 'Australia', 'Australia ', 'au', 'australia'};
   fr= {'FR', 'FRANCE', 'France', 'fr', 'france'};
   mx={ 'MX', 'MEXICO', 'MEX', 'Mexico', 'mexico', 'mx'};
   nl={ 'NL', 'Netherlands', 'Nederland', 'nederland', 'netherlands','nl', 'the netherlands'};
   tw={ 'TW','TAIWAN', 'Taiwan', 'tw','taiwan'};
for i=1:length(usa)
       country_vec(find(strcmp(App_Users_Demographic.COUNTRY,usa(i))))=1;
end
for i=1:length(uk)
       country_vec(find(strcmp(App_Users_Demographic.COUNTRY,uk(i))))=2;
end
for i=1:length(jp)
       country_vec(find(strcmp(App_Users_Demographic.COUNTRY,jp(i))))=3;
end
for i=1:length(ca)
       country_vec(find(strcmp(App_Users_Demographic.COUNTRY,ca(i))))=4;
end
for i=1:length(de)
       country_vec(find(strcmp(App_Users_Demographic.COUNTRY,de(i))))=5;
end
for i=1:length(es)
       country_vec(find(strcmp(App_Users_Demographic.COUNTRY,es(i))))=6;
end
for i=1:length(br)
       country_vec(find(strcmp(App_Users_Demographic.COUNTRY,br(i))))=7;
end
for i=1:length(au)
       country_vec(find(strcmp(App_Users_Demographic.COUNTRY,au(i))))=8;
end
for i=1:length(fr)
       country_vec(find(strcmp(App_Users_Demographic.COUNTRY,fr(i))))=9;
end
for i=1:length(mx)
       country_vec(find(strcmp(App_Users_Demographic.COUNTRY,mx(i))))=10;
end
for i=1:length(nl)
       country_vec(find(strcmp(App_Users_Demographic.COUNTRY,nl(i))))=11;
end
for i=1:length(tw)
       country_vec(find(strcmp(App_Users_Demographic.COUNTRY,tw(i))))=12;
end





%first consider the case of embedded neighborhood
embeddedess=1; 
Embeddedness_code

%next consider the case that there is not embeddedness.
embeddedess=0;
Embeddedness_code


%Output files: "embedded_data_embeddedness1.txt" for embeddedness=1
%              "embedded_data_embeddedness0.txt"  for embeddedness=0

%We then merge the two txt files vertically into one that is called "embedded_data.txt"